import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import load_model
#Loading the Historical fires data of Oregon state into a pandas dataframe
historical_fires = pd.read_csv('./data/historical_oregon_fires.csv', low_memory=False)
historical_fires.shape
(64053, 89)
historical_fires.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| X | 64053.0 | 7.703456e+05 | 5.145695e+05 | 0.0000 | 492431.605971 | 648903.492782 | 1.036540e+06 | 2.330063e+06 |
| Y | 64053.0 | 6.554200e+05 | 4.872065e+05 | 0.0000 | 236353.876969 | 575697.853675 | 1.076593e+06 | 1.656678e+06 |
| OBJECTID | 64053.0 | 3.202700e+04 | 1.849065e+04 | 1.0000 | 16014.000000 | 32027.000000 | 4.804000e+04 | 6.405300e+04 |
| Serial | 64053.0 | 4.987656e+04 | 2.812513e+04 | 10.0000 | 29500.000000 | 45519.000000 | 6.766000e+04 | 1.155570e+05 |
| FireYear | 64053.0 | 1.988937e+03 | 1.686442e+01 | 1960.0000 | 1974.000000 | 1989.000000 | 2.003000e+03 | 2.019000e+03 |
| FiscalYear | 64053.0 | 1.989727e+03 | 1.685892e+01 | 1960.0000 | 1975.000000 | 1990.000000 | 2.004000e+03 | 2.020000e+03 |
| FireArea | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Current_District | 64053.0 | 7.864742e+01 | 1.496438e+01 | 51.0000 | 71.000000 | 73.000000 | 9.500000e+01 | 9.900000e+01 |
| Current_Unit | 64053.0 | 7.883178e+02 | 1.500064e+02 | 511.0000 | 711.000000 | 732.000000 | 9.540000e+02 | 9.910000e+02 |
| FireCategory | 64053.0 | 1.000000e+00 | 0.000000e+00 | 1.0000 | 1.000000 | 1.000000 | 1.000000e+00 | 1.000000e+00 |
| NumberPersonnel | 8742.0 | 6.891329e+00 | 1.379872e+01 | 0.0000 | 2.000000 | 4.000000 | 8.000000e+00 | 6.000000e+02 |
| Sec | 64017.0 | 1.656105e+01 | 1.145511e+01 | 0.0000 | 6.000000 | 16.000000 | 2.700000e+01 | 3.600000e+01 |
| Longitude | 56852.0 | -1.221674e+02 | 1.760223e+00 | -124.5570 | -123.385000 | -122.809000 | -1.214570e+02 | -1.165210e+02 |
| Latitude | 56853.0 | 4.374980e+01 | 1.247257e+00 | 41.8808 | 42.527600 | 43.612800 | 4.485010e+01 | 4.624220e+01 |
| Protection_agency | 63855.0 | 1.112442e+00 | 7.146813e-01 | 0.0000 | 1.000000 | 1.000000 | 1.000000e+00 | 9.000000e+00 |
| Federal_lands | 52339.0 | 1.137832e+00 | 4.811332e-01 | 1.0000 | 1.000000 | 1.000000 | 1.000000e+00 | 3.000000e+00 |
| Land_Class | 63851.0 | 1.781476e+00 | 1.830610e+00 | 0.0000 | 1.000000 | 1.000000 | 1.000000e+00 | 1.100000e+01 |
| Land_Class_tg | 28852.0 | 1.135103e+00 | 3.418398e-01 | 1.0000 | 1.000000 | 1.000000 | 1.000000e+00 | 2.000000e+00 |
| Minimum | 64053.0 | 1.129689e-01 | 3.165573e-01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.000000e+00 |
| Dual | 64053.0 | 1.093626e-01 | 3.120960e-01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.000000e+00 |
| County | 64035.0 | 1.643871e+01 | 8.441621e+00 | 1.0000 | 10.000000 | 17.000000 | 2.000000e+01 | 9.900000e+01 |
| FO_Land_Owner | 63964.0 | 1.828471e+01 | 1.595216e+01 | 0.0000 | 10.000000 | 10.000000 | 2.000000e+01 | 9.000000e+01 |
| SurchargeLot | 64053.0 | 9.033144e-02 | 2.866583e-01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.000000e+00 |
| SurchargeLotAssessed | 64053.0 | 5.781150e-02 | 2.333885e-01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.000000e+00 |
| SB_360_Lot | 12191.0 | 6.353868e-01 | 8.680971e-01 | 0.0000 | 0.000000 | 0.000000 | 2.000000e+00 | 2.000000e+00 |
| SB_360_Liable | 1379.0 | 3.930384e-01 | 4.886024e-01 | 0.0000 | 0.000000 | 0.000000 | 1.000000e+00 | 1.000000e+00 |
| Discovered_By | 63840.0 | 8.514474e+00 | 1.715136e+01 | 1.0000 | 3.000000 | 6.000000 | 8.000000e+00 | 9.900000e+01 |
| Caused_by | 63854.0 | 5.707144e+00 | 4.607953e+00 | 0.0000 | 1.000000 | 5.000000 | 9.000000e+00 | 5.800000e+01 |
| General | 64049.0 | 4.091321e+00 | 2.651601e+00 | 0.0000 | 1.000000 | 4.000000 | 6.000000e+00 | 1.000000e+01 |
| Specific | 63853.0 | 4.301206e+02 | 2.771839e+02 | 0.0000 | 101.000000 | 402.000000 | 6.200000e+02 | 9.990000e+02 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Notif_number | 1871.0 | 2.549896e+07 | 1.886472e+08 | 0.0000 | 389.500000 | 30094.000000 | 6.163350e+04 | 1.871208e+09 |
| Walk_In_Delay | 12183.0 | 9.069605e+00 | 4.201296e+02 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 3.276700e+04 |
| DS_Cost | 63822.0 | 9.738210e+02 | 2.238572e+04 | 0.0000 | 43.000000 | 139.000000 | 4.612355e+02 | 4.098516e+06 |
| ES_Cost | 63820.0 | 1.083201e+04 | 3.282855e+05 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 3.697226e+07 |
| PC_Cost | 63817.0 | 3.099745e+02 | 8.056938e+03 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 9.077680e+05 |
| OA_Cost | 63817.0 | 6.464702e+03 | 2.737810e+05 | 0.0000 | 0.000000 | 0.000000 | 3.300000e+01 | 3.000000e+07 |
| TotalCost | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Est_Damage | 57886.0 | 5.517955e+03 | 2.733955e+05 | -478.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 4.443510e+07 |
| CostRecovery | 64053.0 | 1.842224e-03 | 4.288192e-02 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.000000e+00 |
| FEMA | 64053.0 | 4.744508e-02 | 2.125906e-01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.000000e+00 |
| AttackBy | 63838.0 | 3.422899e+00 | 1.078328e+01 | 1.0000 | 1.000000 | 1.000000 | 2.000000e+00 | 9.900000e+01 |
| AttackType | 63832.0 | 2.638395e+00 | 1.537800e+00 | 0.0000 | 1.000000 | 3.000000 | 4.000000e+00 | 9.000000e+00 |
| Flame_length | 16760.0 | 1.400477e+00 | 8.673592e-01 | 1.0000 | 1.000000 | 1.000000 | 2.000000e+00 | 6.000000e+00 |
| Size_at_attack | 29036.0 | 5.499523e+00 | 3.296863e+02 | 0.0000 | 0.010000 | 0.100000 | 2.500000e-01 | 5.081500e+04 |
| Behavior | 28810.0 | 2.110760e+00 | 1.445836e+00 | 1.0000 | 1.000000 | 2.000000 | 2.000000e+00 | 9.000000e+00 |
| Fuel | 63844.0 | 2.375573e+01 | 3.722856e+01 | 1.0000 | 3.000000 | 7.000000 | 1.200000e+01 | 9.900000e+01 |
| Topography | 64053.0 | 3.013286e-01 | 4.588387e-01 | 0.0000 | 0.000000 | 0.000000 | 1.000000e+00 | 1.000000e+00 |
| Aspect | 64053.0 | 3.016252e-01 | 4.589670e-01 | 0.0000 | 0.000000 | 0.000000 | 1.000000e+00 | 1.000000e+00 |
| Slope | 64053.0 | 3.016096e-01 | 4.589603e-01 | 0.0000 | 0.000000 | 0.000000 | 1.000000e+00 | 1.000000e+00 |
| Elevation | 64053.0 | 3.011569e-01 | 4.587643e-01 | 0.0000 | 0.000000 | 0.000000 | 1.000000e+00 | 1.000000e+00 |
| Size_class | 64052.0 | 1.371792e+00 | 7.271433e-01 | 1.0000 | 1.000000 | 1.000000 | 2.000000e+00 | 7.000000e+00 |
| Size_acres | 63851.0 | 8.138700e+01 | 3.348425e+03 | 0.0000 | 0.050000 | 0.100000 | 5.000000e-01 | 4.999450e+05 |
| Size_prot | 64046.0 | 2.029059e+01 | 4.610015e+02 | 0.0000 | 0.020000 | 0.100000 | 5.000000e-01 | 4.336800e+04 |
| Homes_Saved | 12212.0 | 1.371029e+00 | 2.253093e+01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.221000e+03 |
| Homes_Lost | 12212.0 | 2.030789e-02 | 4.760210e-01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 3.500000e+01 |
| Structures_Saved | 12212.0 | 1.378235e+00 | 2.185595e+01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 1.000000e+03 |
| Structures_Lost | 12212.0 | 5.707501e-02 | 9.832442e-01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 6.000000e+01 |
| Number_of_Injuries | 12212.0 | 7.443498e-02 | 6.343492e+00 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 7.000000e+02 |
| Number_of_Deaths | 12212.0 | 1.719620e-03 | 7.293910e-02 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 7.000000e+00 |
| CauseType | 64053.0 | 1.430222e-01 | 6.179719e-01 | 0.0000 | 0.000000 | 0.000000 | 0.000000e+00 | 3.000000e+00 |
66 rows × 8 columns
historical_fires.corr()
| X | Y | OBJECTID | Serial | FireYear | FiscalYear | FireArea | Current_District | Current_Unit | FireCategory | ... | Size_class | Size_acres | Size_prot | Homes_Saved | Homes_Lost | Structures_Saved | Structures_Lost | Number_of_Injuries | Number_of_Deaths | CauseType | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| X | 1.000000 | 0.569673 | -0.046110 | -0.013913 | 0.270544 | 0.271425 | NaN | 0.582209 | 0.582790 | NaN | ... | 0.041224 | 0.014888 | 0.014578 | 0.018966 | 0.013644 | 0.038826 | 0.032316 | -0.003633 | -0.007731 | -0.004456 |
| Y | 0.569673 | 1.000000 | -0.075587 | -0.029982 | 0.249519 | 0.250744 | NaN | -0.035758 | -0.034135 | NaN | ... | 0.033210 | 0.001582 | -0.001402 | -0.000656 | -0.013638 | 0.011960 | 0.002634 | -0.005990 | 0.003807 | 0.012365 |
| OBJECTID | -0.046110 | -0.075587 | 1.000000 | 0.975739 | 0.387223 | 0.386539 | NaN | 0.061891 | 0.061911 | NaN | ... | 0.011245 | 0.021108 | 0.020731 | 0.009734 | 0.020665 | 0.015485 | 0.021889 | -0.003186 | 0.002646 | 0.377476 |
| Serial | -0.013913 | -0.029982 | 0.975739 | 1.000000 | 0.512250 | 0.511511 | NaN | 0.025019 | 0.025053 | NaN | ... | 0.014625 | 0.022499 | 0.022293 | 0.010475 | 0.022059 | 0.017169 | 0.023424 | -0.003854 | 0.002525 | 0.488493 |
| FireYear | 0.270544 | 0.249519 | 0.387223 | 0.512250 | 1.000000 | 0.999701 | NaN | -0.035637 | -0.035524 | NaN | ... | -0.001244 | 0.022978 | 0.016213 | 0.009203 | 0.021398 | 0.015568 | 0.021995 | -0.003787 | 0.001735 | 0.389611 |
| FiscalYear | 0.271425 | 0.250744 | 0.386539 | 0.511511 | 0.999701 | 1.000000 | NaN | -0.035843 | -0.035731 | NaN | ... | -0.000912 | 0.023272 | 0.016663 | 0.011074 | 0.023118 | 0.017463 | 0.024769 | -0.003147 | 0.002669 | 0.388431 |
| FireArea | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Current_District | 0.582209 | -0.035758 | 0.061891 | 0.025019 | -0.035637 | -0.035843 | NaN | 1.000000 | 0.999978 | NaN | ... | 0.021834 | 0.010784 | 0.018210 | 0.030827 | 0.008614 | 0.038056 | 0.023616 | -0.002401 | -0.012425 | -0.044790 |
| Current_Unit | 0.582790 | -0.034135 | 0.061911 | 0.025053 | -0.035524 | -0.035731 | NaN | 0.999978 | 1.000000 | NaN | ... | 0.022103 | 0.010873 | 0.018298 | 0.031090 | 0.008595 | 0.038264 | 0.023702 | -0.002380 | -0.012483 | -0.044747 |
| FireCategory | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| NumberPersonnel | 0.050067 | -0.011351 | 0.038979 | 0.039914 | 0.038630 | 0.042701 | NaN | 0.048268 | 0.048037 | NaN | ... | 0.456536 | 0.106450 | 0.174076 | 0.486013 | NaN | 0.726966 | 0.259737 | -0.003466 | NaN | NaN |
| Sec | 0.276743 | 0.241743 | -0.068289 | -0.018383 | 0.275254 | 0.275100 | NaN | -0.001605 | -0.001552 | NaN | ... | -0.016015 | 0.007551 | 0.000098 | 0.017810 | 0.015763 | 0.018883 | 0.014290 | -0.005701 | 0.009145 | 0.037712 |
| Longitude | 0.999621 | 0.403465 | 0.029330 | 0.005394 | -0.020575 | -0.019295 | NaN | 0.735470 | 0.736070 | NaN | ... | 0.068680 | 0.014185 | 0.023342 | 0.019023 | 0.013894 | 0.038717 | 0.032586 | -0.003546 | -0.008113 | -0.055553 |
| Latitude | 0.430500 | 0.999848 | -0.014095 | -0.015591 | -0.009079 | -0.007390 | NaN | -0.033575 | -0.031697 | NaN | ... | 0.054684 | -0.001556 | 0.001208 | -0.000363 | -0.013585 | 0.012202 | 0.002664 | -0.006026 | 0.003687 | -0.030219 |
| Protection_agency | 0.032814 | 0.069581 | 0.028075 | 0.036276 | 0.068306 | 0.068977 | NaN | -0.014353 | -0.014340 | NaN | ... | 0.135823 | 0.055002 | 0.032069 | 0.054253 | 0.073714 | 0.057667 | 0.086431 | 0.000206 | 0.004198 | 0.026403 |
| Federal_lands | -0.059000 | -0.091272 | -0.052258 | -0.047595 | -0.048299 | -0.048102 | NaN | -0.056846 | -0.057087 | NaN | ... | 0.075656 | 0.039387 | 0.025605 | 0.035157 | 0.014590 | 0.043083 | 0.033986 | -0.001039 | 0.030475 | -0.025069 |
| Land_Class | -0.003016 | 0.044372 | 0.055674 | 0.058401 | 0.007211 | 0.008302 | NaN | -0.031734 | -0.031625 | NaN | ... | 0.115639 | 0.061540 | 0.030654 | 0.065291 | 0.050525 | 0.074048 | 0.073065 | -0.001377 | 0.024840 | 0.053213 |
| Land_Class_tg | 0.157565 | -0.059960 | 0.037642 | 0.045899 | 0.055574 | 0.057065 | NaN | 0.140536 | 0.140132 | NaN | ... | 0.103653 | -0.001977 | -0.000250 | -0.002734 | -0.001286 | -0.000082 | 0.001886 | -0.004488 | -0.004179 | 0.036976 |
| Minimum | -0.033101 | 0.042316 | 0.091073 | 0.117014 | 0.326262 | 0.325246 | NaN | -0.101031 | -0.100726 | NaN | ... | -0.060317 | -0.007858 | -0.011600 | -0.017529 | -0.003337 | -0.019327 | 0.000293 | -0.005802 | -0.005715 | 0.138474 |
| Dual | -0.041756 | 0.031547 | 0.159198 | 0.187814 | 0.346062 | 0.345094 | NaN | -0.106424 | -0.106219 | NaN | ... | -0.033875 | -0.007613 | -0.010464 | -0.014816 | -0.004722 | -0.019048 | -0.003160 | -0.006050 | 0.013413 | 0.174779 |
| County | 0.268164 | 0.199976 | 0.002904 | -0.004643 | -0.010720 | -0.009931 | NaN | 0.229764 | 0.230056 | NaN | ... | 0.038845 | 0.003358 | 0.002829 | -0.000813 | 0.015776 | 0.002500 | 0.018109 | -0.006797 | 0.023838 | -0.006714 |
| FO_Land_Owner | -0.092466 | -0.076038 | 0.040559 | 0.046112 | 0.051165 | 0.051341 | NaN | -0.097165 | -0.097397 | NaN | ... | -0.000267 | 0.024035 | 0.020590 | 0.016357 | -0.001801 | 0.023054 | 0.006199 | 0.017379 | 0.013507 | 0.018109 |
| SurchargeLot | 0.003817 | 0.035827 | 0.274756 | 0.307456 | 0.361303 | 0.360186 | NaN | -0.072306 | -0.072007 | NaN | ... | -0.027278 | -0.006203 | -0.007550 | -0.018503 | 0.001143 | -0.016075 | 0.005061 | -0.006289 | -0.000029 | 0.212616 |
| SurchargeLotAssessed | 0.016206 | 0.027337 | 0.150192 | 0.159111 | 0.247901 | 0.247110 | NaN | -0.035084 | -0.034857 | NaN | ... | 0.010971 | 0.006559 | 0.011824 | 0.026464 | 0.018859 | 0.042898 | 0.037888 | -0.002809 | 0.007866 | 0.089671 |
| SB_360_Lot | -0.036957 | 0.305162 | -0.133812 | -0.143383 | -0.151897 | -0.151038 | NaN | -0.262374 | -0.262061 | NaN | ... | 0.008907 | 0.006421 | -0.005042 | -0.015476 | 0.004041 | 0.000030 | 0.006110 | -0.006837 | -0.009516 | -0.100448 |
| SB_360_Liable | -0.013105 | -0.053098 | 0.064047 | 0.063442 | 0.060411 | 0.065249 | NaN | -0.029532 | -0.028520 | NaN | ... | 0.006749 | 0.010001 | 0.030298 | 0.039566 | -0.017327 | 0.038527 | -0.000260 | -0.013745 | -0.030668 | 0.062793 |
| Discovered_By | -0.026297 | 0.010418 | -0.000592 | -0.000420 | -0.001773 | -0.002144 | NaN | -0.061890 | -0.061912 | NaN | ... | -0.010416 | 0.000289 | -0.002014 | -0.002955 | -0.002867 | -0.001944 | -0.001020 | -0.001028 | -0.001506 | 0.001609 |
| Caused_by | -0.180777 | 0.010206 | 0.034016 | 0.053407 | 0.094458 | 0.094952 | NaN | -0.232965 | -0.232800 | NaN | ... | -0.039441 | -0.018645 | -0.019359 | -0.017531 | -0.005667 | -0.022366 | -0.015159 | -0.010449 | 0.015604 | 0.099491 |
| General | -0.249815 | 0.001324 | -0.033927 | -0.014261 | 0.023173 | 0.021539 | NaN | -0.307928 | -0.307623 | NaN | ... | 0.051790 | -0.019316 | -0.018939 | -0.019199 | 0.013946 | -0.020348 | 0.006313 | -0.011420 | 0.011680 | 0.054432 |
| Specific | -0.253435 | 0.005833 | -0.038920 | -0.020418 | 0.031330 | 0.029667 | NaN | -0.318561 | -0.318259 | NaN | ... | 0.054835 | -0.021936 | -0.023244 | -0.026318 | 0.010508 | -0.031812 | -0.001647 | -0.011893 | 0.011448 | 0.049000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Notif_number | -0.040257 | 0.003248 | 0.138567 | 0.165521 | 0.192332 | 0.191538 | NaN | -0.063862 | -0.063995 | NaN | ... | 0.011822 | -0.006775 | -0.006475 | -0.008141 | NaN | -0.007303 | -0.008921 | -0.019120 | NaN | 0.219439 |
| Walk_In_Delay | 0.010980 | 0.006515 | -0.000970 | -0.001418 | -0.002133 | -0.001175 | NaN | 0.007946 | 0.007988 | NaN | ... | -0.004820 | 0.001410 | 0.000592 | 0.000508 | -0.000354 | 0.000172 | -0.000054 | -0.000211 | -0.000349 | 0.005575 |
| DS_Cost | 0.006140 | 0.005339 | 0.029744 | 0.037114 | 0.040191 | 0.040404 | NaN | -0.000909 | -0.000864 | NaN | ... | 0.103026 | 0.148176 | 0.140773 | 0.158965 | 0.118909 | 0.172102 | 0.225208 | 0.004031 | 0.001656 | 0.037195 |
| ES_Cost | 0.004406 | -0.005804 | 0.028985 | 0.034103 | 0.036039 | 0.036378 | NaN | 0.003722 | 0.003754 | NaN | ... | 0.203951 | 0.117127 | 0.538398 | 0.208646 | 0.088508 | 0.124191 | 0.110395 | 0.007333 | -0.000435 | 0.017519 |
| PC_Cost | -0.016291 | 0.009639 | -0.012612 | -0.010287 | -0.003859 | -0.003905 | NaN | -0.021788 | -0.021815 | NaN | ... | 0.123426 | 0.037992 | 0.058478 | 0.148666 | 0.085062 | 0.143068 | 0.213207 | 0.001818 | -0.001112 | -0.004021 |
| OA_Cost | 0.028216 | 0.010032 | 0.025436 | 0.028733 | 0.028818 | 0.029104 | NaN | 0.020714 | 0.020819 | NaN | ... | 0.166249 | 0.210716 | 0.204437 | 0.222987 | 0.309615 | 0.148830 | 0.249992 | 0.005906 | 0.055465 | 0.011535 |
| TotalCost | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| Est_Damage | 0.002900 | -0.005078 | 0.011167 | 0.013746 | 0.016641 | 0.016848 | NaN | -0.000590 | -0.000585 | NaN | ... | 0.129803 | 0.140441 | 0.361981 | 0.235642 | 0.216242 | 0.225525 | 0.206857 | 0.007358 | 0.010977 | 0.000985 |
| CostRecovery | -0.003576 | -0.001158 | 0.064246 | 0.078343 | 0.065573 | 0.065827 | NaN | -0.005411 | -0.005341 | NaN | ... | 0.115726 | 0.169630 | 0.163015 | 0.313700 | 0.147682 | 0.264737 | 0.210603 | 0.004544 | -0.002319 | 0.066646 |
| FEMA | -0.006543 | 0.054456 | 0.077677 | 0.096058 | 0.211812 | 0.211446 | NaN | -0.056832 | -0.056657 | NaN | ... | 0.113937 | 0.009724 | 0.012814 | 0.021550 | 0.003644 | 0.020767 | 0.020403 | -0.002090 | -0.004512 | 0.111274 |
| AttackBy | -0.016568 | 0.004565 | -0.038658 | -0.036805 | -0.016919 | -0.016847 | NaN | -0.033194 | -0.033177 | NaN | ... | -0.012994 | -0.001331 | -0.004829 | -0.006988 | -0.004416 | -0.006900 | -0.003478 | -0.001988 | -0.003556 | -0.010751 |
| AttackType | 0.062004 | 0.120003 | 0.043191 | 0.070952 | 0.188141 | 0.188685 | NaN | -0.073261 | -0.072884 | NaN | ... | 0.131670 | 0.017868 | 0.022897 | 0.029022 | 0.033178 | 0.049811 | 0.062438 | 0.006727 | 0.000718 | 0.065722 |
| Flame_length | -0.030116 | -0.012313 | 0.024774 | 0.026842 | 0.031431 | 0.033724 | NaN | -0.041737 | -0.041676 | NaN | ... | 0.389546 | 0.050168 | 0.085713 | 0.137628 | 0.105252 | 0.149001 | 0.151168 | NaN | NaN | NaN |
| Size_at_attack | 0.023847 | 0.009183 | -0.004119 | -0.002668 | -0.001009 | -0.000670 | NaN | 0.017276 | 0.017314 | NaN | ... | 0.100735 | 0.100504 | 0.041833 | 0.004069 | 0.008096 | 0.008147 | 0.040697 | -0.000083 | -0.000595 | 0.002353 |
| Behavior | -0.030251 | 0.011625 | -0.033544 | -0.037446 | -0.042252 | -0.042496 | NaN | -0.006868 | -0.007063 | NaN | ... | 0.208912 | 0.035164 | 0.046629 | 0.057725 | 0.043267 | 0.052336 | 0.053623 | -0.004518 | 0.007543 | -0.028705 |
| Fuel | -0.400156 | -0.265852 | 0.039585 | -0.027520 | -0.450215 | -0.449569 | NaN | -0.080206 | -0.080425 | NaN | ... | -0.031521 | -0.010888 | -0.013374 | -0.013394 | 0.001123 | -0.013492 | -0.006558 | -0.001690 | 0.007884 | -0.086634 |
| Topography | 0.096087 | 0.068489 | -0.091927 | -0.085490 | 0.476654 | 0.476873 | NaN | -0.035946 | -0.035890 | NaN | ... | -0.029478 | 0.008816 | -0.000125 | 0.001980 | 0.016206 | -0.003902 | 0.004359 | -0.003955 | 0.014473 | 0.093577 |
| Aspect | 0.096190 | 0.068973 | -0.091296 | -0.084815 | 0.477251 | 0.477474 | NaN | -0.036552 | -0.036498 | NaN | ... | -0.029847 | 0.008798 | -0.000153 | 0.001915 | 0.016142 | -0.003965 | 0.004287 | -0.003966 | 0.014434 | 0.094116 |
| Slope | 0.096220 | 0.069228 | -0.091175 | -0.084710 | 0.477256 | 0.477477 | NaN | -0.036572 | -0.036516 | NaN | ... | -0.029783 | 0.008799 | -0.000151 | 0.001924 | 0.016142 | -0.003965 | 0.004287 | -0.003966 | 0.014434 | 0.094346 |
| Elevation | 0.095528 | 0.068704 | -0.091183 | -0.084958 | 0.476524 | 0.476756 | NaN | -0.037122 | -0.037066 | NaN | ... | -0.029993 | 0.008825 | -0.000115 | 0.001971 | 0.014600 | -0.004041 | 0.002813 | -0.003946 | 0.014506 | 0.092412 |
| Size_class | 0.041224 | 0.033210 | 0.011245 | 0.014625 | -0.001244 | -0.000912 | NaN | 0.021834 | 0.022103 | NaN | ... | 1.000000 | 0.179155 | 0.286786 | 0.279775 | 0.155975 | 0.284261 | 0.237754 | 0.005499 | 0.003354 | 0.004100 |
| Size_acres | 0.014888 | 0.001582 | 0.021108 | 0.022499 | 0.022978 | 0.023272 | NaN | 0.010784 | 0.010873 | NaN | ... | 0.179155 | 1.000000 | 0.328017 | 0.302674 | 0.198700 | 0.415370 | 0.364108 | 0.004140 | 0.006211 | 0.010768 |
| Size_prot | 0.014578 | -0.001402 | 0.020731 | 0.022293 | 0.016213 | 0.016663 | NaN | 0.018210 | 0.018298 | NaN | ... | 0.286786 | 0.328017 | 1.000000 | 0.324740 | 0.220377 | 0.390251 | 0.427034 | 0.011438 | 0.002197 | 0.005513 |
| Homes_Saved | 0.018966 | -0.000656 | 0.009734 | 0.010475 | 0.009203 | 0.011074 | NaN | 0.030827 | 0.031090 | NaN | ... | 0.279775 | 0.302674 | 0.324740 | 1.000000 | 0.279326 | 0.724782 | 0.354939 | 0.300181 | 0.010176 | 0.008187 |
| Homes_Lost | 0.013644 | -0.013638 | 0.020665 | 0.022059 | 0.021398 | 0.023118 | NaN | 0.008614 | 0.008595 | NaN | ... | 0.155975 | 0.198700 | 0.220377 | 0.279326 | 1.000000 | 0.365731 | 0.829151 | 0.008015 | 0.086264 | 0.013673 |
| Structures_Saved | 0.038826 | 0.011960 | 0.015485 | 0.017169 | 0.015568 | 0.017463 | NaN | 0.038056 | 0.038264 | NaN | ... | 0.284261 | 0.415370 | 0.390251 | 0.724782 | 0.365731 | 1.000000 | 0.483552 | 0.306562 | 0.004113 | 0.009780 |
| Structures_Lost | 0.032316 | 0.002634 | 0.021889 | 0.023424 | 0.021995 | 0.024769 | NaN | 0.023616 | 0.023702 | NaN | ... | 0.237754 | 0.364108 | 0.427034 | 0.354939 | 0.829151 | 0.483552 | 1.000000 | 0.007105 | 0.058010 | 0.010279 |
| Number_of_Injuries | -0.003633 | -0.005990 | -0.003186 | -0.003854 | -0.003787 | -0.003147 | NaN | -0.002401 | -0.002380 | NaN | ... | 0.005499 | 0.004140 | 0.011438 | 0.300181 | 0.008015 | 0.306562 | 0.007105 | 1.000000 | 0.000431 | -0.005187 |
| Number_of_Deaths | -0.007731 | 0.003807 | 0.002646 | 0.002525 | 0.001735 | 0.002669 | NaN | -0.012425 | -0.012483 | NaN | ... | 0.003354 | 0.006211 | 0.002197 | 0.010176 | 0.086264 | 0.004113 | 0.058010 | 0.000431 | 1.000000 | 0.012935 |
| CauseType | -0.004456 | 0.012365 | 0.377476 | 0.488493 | 0.389611 | 0.388431 | NaN | -0.044790 | -0.044747 | NaN | ... | 0.004100 | 0.010768 | 0.005513 | 0.008187 | 0.013673 | 0.009780 | 0.010279 | -0.005187 | 0.012935 | 1.000000 |
66 rows × 66 columns
drop_columns = ['OBJECTID', 'Serial' ,'FireNumber' ,'FiscalYear','FireArea','Current_District', 'Current_Unit','FireName',
'PreparedBy', 'LandmarkLocation', 'Equipment','NumberPersonnel', 'CreationDate', 'ModifiedBy', 'ModifiedDate',
'Twn', 'Rng', 'Sec', 'Subdiv', 'County','LO_Name','DiscoveredByName','Specific',
'Cause_Comments','Lead_Investigator', 'Degree_certain','General_Restriction', 'Industrial_Restriction',
'RegulatedUseZone','Involve_op', 'Notif_year','Notif_dist', 'Notif_number','Report_DateTime', 'Attack_DateTime',
'Control_DateTime','Walk_In_Delay', 'DS_Cost', 'ES_Cost', 'PC_Cost', 'OA_Cost','TotalCost', 'Est_Damage', 'CostRecovery',
'AttackBy','AttackType','Size_class','Size_prot', 'Homes_Saved', 'Homes_Lost','Structures_Saved', 'Structures_Lost',
'Number_of_Injuries','Number_of_Deaths', 'CauseType']
historical_fires.drop(columns=drop_columns,inplace=True)
use_later = ["Burn_Index","WeatherStationID"]
historical_fires.drop(columns=use_later,inplace=True)
#Convert columns to datetime format
historical_fires['Ign_DateTime']=pd.to_datetime(historical_fires['Ign_DateTime'])
historical_fires['Discover_DateTime']=pd.to_datetime(historical_fires['Discover_DateTime'])
historical_fires['fire_dayofyear'] = historical_fires['Discover_DateTime'].dt.dayofyear
#historical_fires['fire_dayofyear'] = historical_fires['fire_dayofyear'].astype(int)
historical_fires = historical_fires.fillna(0)
#Removing time columns
time_columns = ['Ign_DateTime','Discover_DateTime']
historical_fires.drop(columns=time_columns,inplace=True)
#The target to predict, area burnt by fire.
historical_fires['Size_acres'].describe()
count 64053.000000 mean 81.130331 std 3343.144286 min 0.000000 25% 0.050000 50% 0.100000 75% 0.500000 max 499945.000000 Name: Size_acres, dtype: float64
#checking for Null Values in the data
historical_fires.isnull().sum()
X 0 Y 0 FireYear 0 FireCategory 0 Longitude 0 Latitude 0 Protection_agency 0 Federal_lands 0 Land_Class 0 Land_Class_tg 0 Minimum 0 Dual 0 FO_Land_Owner 0 SurchargeLot 0 SurchargeLotAssessed 0 SB_360_Lot 0 SB_360_Liable 0 Discovered_By 0 Caused_by 0 General 0 FEMA 0 Flame_length 0 Size_at_attack 0 Behavior 0 Fuel 0 Topography 0 Aspect 0 Slope 0 Elevation 0 Size_acres 0 fire_dayofyear 0 dtype: int64
import matplotlib.pyplot as plt
import numpy as np
# Normal Distribution of fire size (target)
# Central Limit Theory applies
plt.hist(historical_fires['Size_acres'], bins=np.logspace(-4,6, 50));
plt.ylabel('Number of Fires')
plt.xlabel('Fire Size')
plt.gca().set_yscale("log")
plt.gca().set_xscale("log")
plt.show()
# !pip install plotly-express
import plotly.express as px
fig = px.scatter(historical_fires, x = "Longitude", y = "Latitude", color = "FireYear", opacity=0.2)
fig.show()
Often when working with data that includes latitudes and longitudes, 0,0 is an identifier for potentially erroneous data
# 7200 observations at Null Island
len(historical_fires.query('Longitude==0 & Latitude==0'))
7200
# 99.99% of latitudes are between ...
np.percentile(historical_fires['Latitude'], 0.01), np.percentile(historical_fires['Latitude'], 99.99)
(0.0, 46.23157843999999)
# 99.99% of longitudes are between ...
np.percentile(historical_fires['Longitude'], 0.01), np.percentile(historical_fires['Longitude'], 99.99)
(-124.52756880000001, 0.0)
# Remove all the Null Islands
historical_fires = historical_fires[(historical_fires['Latitude'] >0) &
(historical_fires['Longitude']<0)]
# Look at the relationship between latitude/longitude and year that the fire occurred in
fig = px.scatter(historical_fires, x = "Longitude", y = "Latitude", color = "FireYear", opacity=0.2)
fig.show()
# Look at the relationship between latitude, longitude, fire year and fire size (in acres)
fig = px.scatter(historical_fires, x = "Longitude", y = "Latitude", color = "FireYear", size="Size_acres", opacity =0.9)
fig.show()
# Overlaying fire latitude/longitude to stamen-terrain mapbox layout
fig = px.scatter_mapbox(historical_fires, lat='Latitude', lon='Longitude', color='FireYear', opacity=0.1)
fig.update_layout(mapbox_style='stamen-terrain')
fig.show()
# Cluster the locations
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=10, n_jobs=-1)
historical_fires['cluster'] = kmeans.fit_predict(historical_fires[['Longitude', 'Latitude']])
px.scatter(historical_fires, x='Longitude', y='Latitude', color='cluster')
# Cluster the locations based on GIS acres
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=7, n_jobs=-1)
explore = historical_fires.copy()
explore['cluster'] = kmeans.fit_predict(explore[['Size_acres']])
px.scatter(explore, x='Longitude', y='Latitude', color='cluster')
def engineer_features(df):
# What's the terrain, based on latitude & longitude?
clusters = pd.get_dummies(df['cluster'], prefix='cluster')
for col in clusters:
df[col] = clusters[col]
# Remove Outlier/invalid values for 'caused_by' column
df = df[df['Caused_by'] < 15]
#As the standard deviation is very high, removing the outliers from the data
df = df[df['Size_acres'] < 500]
#Based on pandas-profiling report, we're rejecting features highly-correlated with other criteria
correlated_columns = ['Slope', 'Aspect', 'Topography', 'X', 'Y']
df = df.drop(columns=correlated_columns)
# remove unusable variance - constant value
unusable_variance = ['FireCategory']
df = df.drop(columns=unusable_variance)
return df
# wrangle dataframe using engineering features function
historical_fires = engineer_features(historical_fires)
#Creating the data for training
Y = historical_fires['Size_acres']
X = historical_fires.drop(columns='Size_acres')
# #Normalising the Data
sc = StandardScaler()
X_norm = sc.fit_transform(X)
#creating the train and test data
X_train,X_test,y_train,y_test = train_test_split(X_norm,Y,test_size = 0.2)
print("# Train: {} , #Test: {}".format(X_train.shape[0], X_test.shape[0]))
print("# inputs: {}".format(X_train.shape[1]))
n = X_train.shape[1]
# Train: 41381 , #Test: 10346 # inputs: 35
cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
ax = sns.scatterplot(x="Longitude", y="Latitude",
hue="Caused_by", size="Size_acres",
sizes=(20, 200),palette="Set1", data=historical_fires)
import pandas_profiling
pandas_profiling.ProfileReport(X_train)
Dataset info
| Number of variables | 42 |
|---|---|
| Number of observations | 41669 |
| Total Missing (%) | 0.0% |
| Total size in memory | 10.4 MiB |
| Average record size in memory | 262.0 B |
Variables types
| Numeric | 19 |
|---|---|
| Categorical | 0 |
| Boolean | 17 |
| Date | 0 |
| Text (Unique) | 0 |
| Rejected | 6 |
| Unsupported | 0 |
Warnings
Aspect is highly correlated with Topography (ρ = 0.99903) RejectedBehavior has 20800 / 49.9% zeros ZerosElevation is highly correlated with Slope (ρ = 0.99764) RejectedFederal_lands has 3469 / 8.3% zeros ZerosFireCategory has constant value 1 RejectedFlame_length has 29522 / 70.8% zeros ZerosLand_Class_tg has 20772 / 49.9% zeros ZerosLatitude is highly correlated with Y (ρ = 0.99991) RejectedLongitude is highly correlated with X (ρ = 0.99964) RejectedSB_360_Lot has 38352 / 92.0% zeros ZerosSize_at_attack is highly skewed (γ1 = 181.37) SkewedSize_at_attack has 21569 / 51.8% zeros ZerosSlope is highly correlated with Aspect (ρ = 0.99941) Rejectedcluster has 4815 / 11.6% zeros ZerosAspect
Highly correlated
This variable is highly correlated with Topography and should be ignored for analysis
| Correlation | 0.99903 |
|---|
Behavior
Numeric
| Distinct count | 9 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.0486 |
|---|---|
| Minimum | 0 |
| Maximum | 9 |
| Zeros (%) | 49.9% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 1 |
| Q3 | 2 |
| 95-th percentile | 3 |
| Maximum | 9 |
| Range | 9 |
| Interquartile range | 2 |
Descriptive statistics
| Standard deviation | 1.4522 |
|---|---|
| Coef of variation | 1.3849 |
| Kurtosis | 11.138 |
| Mean | 1.0486 |
| MAD | 1.0608 |
| Skewness | 2.6526 |
| Sum | 43695 |
| Variance | 2.1089 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 20800 | 49.9% |
|
| 2.0 | 11689 | 28.1% |
|
| 1.0 | 5973 | 14.3% |
|
| 3.0 | 1786 | 4.3% |
|
| 4.0 | 682 | 1.6% |
|
| 9.0 | 601 | 1.4% |
|
| 6.0 | 61 | 0.1% |
|
| 7.0 | 49 | 0.1% |
|
| 5.0 | 28 | 0.1% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 20800 | 49.9% |
|
| 1.0 | 5973 | 14.3% |
|
| 2.0 | 11689 | 28.1% |
|
| 3.0 | 1786 | 4.3% |
|
| 4.0 | 682 | 1.6% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 4.0 | 682 | 1.6% |
|
| 5.0 | 28 | 0.1% |
|
| 6.0 | 61 | 0.1% |
|
| 7.0 | 49 | 0.1% |
|
| 9.0 | 601 | 1.4% |
|
Caused_by
Numeric
| Distinct count | 15 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.9731 |
|---|---|
| Minimum | 0 |
| Maximum | 14 |
| Zeros (%) | 0.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 5 |
| Q3 | 8 |
| 95-th percentile | 13 |
| Maximum | 14 |
| Range | 14 |
| Interquartile range | 7 |
Descriptive statistics
| Standard deviation | 3.8403 |
|---|---|
| Coef of variation | 0.7722 |
| Kurtosis | -0.70066 |
| Mean | 4.9731 |
| MAD | 3.1701 |
| Skewness | 0.61847 |
| Sum | 207230 |
| Variance | 14.748 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 13559 | 32.5% |
|
| 5.0 | 7390 | 17.7% |
|
| 9.0 | 5026 | 12.1% |
|
| 13.0 | 3293 | 7.9% |
|
| 7.0 | 2432 | 5.8% |
|
| 2.0 | 2255 | 5.4% |
|
| 6.0 | 2070 | 5.0% |
|
| 3.0 | 1824 | 4.4% |
|
| 10.0 | 1296 | 3.1% |
|
| 4.0 | 1100 | 2.6% |
|
| Other values (5) | 1424 | 3.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 161 | 0.4% |
|
| 1.0 | 13559 | 32.5% |
|
| 2.0 | 2255 | 5.4% |
|
| 3.0 | 1824 | 4.4% |
|
| 4.0 | 1100 | 2.6% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 10.0 | 1296 | 3.1% |
|
| 11.0 | 262 | 0.6% |
|
| 12.0 | 113 | 0.3% |
|
| 13.0 | 3293 | 7.9% |
|
| 14.0 | 91 | 0.2% |
|
Discovered_By
Numeric
| Distinct count | 14 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 8.2852 |
|---|---|
| Minimum | 0 |
| Maximum | 99 |
| Zeros (%) | 0.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 3 |
| Median | 6 |
| Q3 | 8 |
| 95-th percentile | 9 |
| Maximum | 99 |
| Range | 99 |
| Interquartile range | 5 |
Descriptive statistics
| Standard deviation | 16.266 |
|---|---|
| Coef of variation | 1.9632 |
| Kurtosis | 26.498 |
| Mean | 8.2852 |
| MAD | 5.6874 |
| Skewness | 5.2694 |
| Sum | 345230 |
| Variance | 264.57 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 6.0 | 12197 | 29.3% |
|
| 8.0 | 6086 | 14.6% |
|
| 1.0 | 3730 | 9.0% |
|
| 3.0 | 3602 | 8.6% |
|
| 9.0 | 3175 | 7.6% |
|
| 7.0 | 3090 | 7.4% |
|
| 2.0 | 2952 | 7.1% |
|
| 4.0 | 2917 | 7.0% |
|
| 5.0 | 1951 | 4.7% |
|
| 99.0 | 1270 | 3.0% |
|
| Other values (4) | 699 | 1.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 150 | 0.4% |
|
| 1.0 | 3730 | 9.0% |
|
| 2.0 | 2952 | 7.1% |
|
| 3.0 | 3602 | 8.6% |
|
| 4.0 | 2917 | 7.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 9.0 | 3175 | 7.6% |
|
| 10.0 | 475 | 1.1% |
|
| 11.0 | 73 | 0.2% |
|
| 13.0 | 1 | 0.0% |
|
| 99.0 | 1270 | 3.0% |
|
Dual
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.1204 |
|---|
| 0 |
36652
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 36652 | 88.0% |
|
| 1 | 5017 | 12.0% |
|
Elevation
Highly correlated
This variable is highly correlated with Slope and should be ignored for analysis
| Correlation | 0.99764 |
|---|
FEMA
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.054237 |
|---|
| 0 |
39409
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 39409 | 94.6% |
|
| 1 | 2260 | 5.4% |
|
FO_Land_Owner
Numeric
| Distinct count | 22 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 18.305 |
|---|---|
| Minimum | 0 |
| Maximum | 90 |
| Zeros (%) | 0.1% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 10 |
| Q1 | 10 |
| Median | 11 |
| Q3 | 20 |
| 95-th percentile | 50 |
| Maximum | 90 |
| Range | 90 |
| Interquartile range | 10 |
Descriptive statistics
| Standard deviation | 15.955 |
|---|---|
| Coef of variation | 0.87165 |
| Kurtosis | 5.0835 |
| Mean | 18.305 |
| MAD | 11.361 |
| Skewness | 2.2726 |
| Sum | 762740 |
| Variance | 254.57 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 10.0 | 19695 | 47.3% |
|
| 13.0 | 4951 | 11.9% |
|
| 50.0 | 4444 | 10.7% |
|
| 11.0 | 3978 | 9.5% |
|
| 20.0 | 2959 | 7.1% |
|
| 12.0 | 2047 | 4.9% |
|
| 30.0 | 1540 | 3.7% |
|
| 40.0 | 734 | 1.8% |
|
| 90.0 | 573 | 1.4% |
|
| 70.0 | 194 | 0.5% |
|
| Other values (12) | 554 | 1.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 59 | 0.1% |
|
| 1.0 | 172 | 0.4% |
|
| 2.0 | 24 | 0.1% |
|
| 3.0 | 9 | 0.0% |
|
| 4.0 | 3 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 50.0 | 4444 | 10.7% |
|
| 60.0 | 118 | 0.3% |
|
| 70.0 | 194 | 0.5% |
|
| 80.0 | 9 | 0.0% |
|
| 90.0 | 573 | 1.4% |
|
Federal_lands
Numeric
| Distinct count | 4 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.0473 |
|---|---|
| Minimum | 0 |
| Maximum | 3 |
| Zeros (%) | 8.3% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 1 |
| Median | 1 |
| Q3 | 1 |
| 95-th percentile | 3 |
| Maximum | 3 |
| Range | 3 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.56382 |
|---|---|
| Coef of variation | 0.53838 |
| Kurtosis | 5.8411 |
| Mean | 1.0473 |
| MAD | 0.2537 |
| Skewness | 1.792 |
| Sum | 43638 |
| Variance | 0.31789 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 34978 | 83.9% |
|
| 0.0 | 3469 | 8.3% |
|
| 3.0 | 2216 | 5.3% |
|
| 2.0 | 1006 | 2.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3469 | 8.3% |
|
| 1.0 | 34978 | 83.9% |
|
| 2.0 | 1006 | 2.4% |
|
| 3.0 | 2216 | 5.3% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3469 | 8.3% |
|
| 1.0 | 34978 | 83.9% |
|
| 2.0 | 1006 | 2.4% |
|
| 3.0 | 2216 | 5.3% |
|
FireCategory
Constant
This variable is constant and should be ignored for analysis
| Constant value | 1 |
|---|
FireYear
Numeric
| Distinct count | 59 |
|---|---|
| Unique (%) | 0.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1992 |
|---|---|
| Minimum | 1960 |
| Maximum | 2019 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1960 |
|---|---|
| 5-th percentile | 1969 |
| Q1 | 1979 |
| Median | 1992 |
| Q3 | 2005 |
| 95-th percentile | 2016 |
| Maximum | 2019 |
| Range | 59 |
| Interquartile range | 26 |
Descriptive statistics
| Standard deviation | 15.017 |
|---|---|
| Coef of variation | 0.0075386 |
| Kurtosis | -1.1464 |
| Mean | 1992 |
| MAD | 12.864 |
| Skewness | 0.064528 |
| Sum | 83005611 |
| Variance | 225.51 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 1987 | 1249 | 3.0% |
|
| 1970 | 1219 | 2.9% |
|
| 1992 | 1207 | 2.9% |
|
| 1994 | 1043 | 2.5% |
|
| 1974 | 1014 | 2.4% |
|
| 1975 | 995 | 2.4% |
|
| 1973 | 986 | 2.4% |
|
| 2006 | 943 | 2.3% |
|
| 2001 | 934 | 2.2% |
|
| 1985 | 920 | 2.2% |
|
| Other values (49) | 31159 | 74.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1960 | 4 | 0.0% |
|
| 1961 | 6 | 0.0% |
|
| 1962 | 2 | 0.0% |
|
| 1963 | 2 | 0.0% |
|
| 1964 | 5 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2015 | 771 | 1.9% |
|
| 2016 | 594 | 1.4% |
|
| 2017 | 812 | 1.9% |
|
| 2018 | 820 | 2.0% |
|
| 2019 | 408 | 1.0% |
|
Flame_length
Numeric
| Distinct count | 7 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.40308 |
|---|---|
| Minimum | 0 |
| Maximum | 6 |
| Zeros (%) | 70.8% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 1 |
| 95-th percentile | 2 |
| Maximum | 6 |
| Range | 6 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 0.77548 |
|---|---|
| Coef of variation | 1.9239 |
| Kurtosis | 12.563 |
| Mean | 0.40308 |
| MAD | 0.57116 |
| Skewness | 2.9311 |
| Sum | 16796 |
| Variance | 0.60137 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 29522 | 70.8% |
|
| 1.0 | 9156 | 22.0% |
|
| 2.0 | 2068 | 5.0% |
|
| 3.0 | 530 | 1.3% |
|
| 4.0 | 183 | 0.4% |
|
| 6.0 | 132 | 0.3% |
|
| 5.0 | 78 | 0.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 29522 | 70.8% |
|
| 1.0 | 9156 | 22.0% |
|
| 2.0 | 2068 | 5.0% |
|
| 3.0 | 530 | 1.3% |
|
| 4.0 | 183 | 0.4% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2.0 | 2068 | 5.0% |
|
| 3.0 | 530 | 1.3% |
|
| 4.0 | 183 | 0.4% |
|
| 5.0 | 78 | 0.2% |
|
| 6.0 | 132 | 0.3% |
|
Fuel
Numeric
| Distinct count | 15 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 15.588 |
|---|---|
| Minimum | 0 |
| Maximum | 99 |
| Zeros (%) | 0.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 3 |
| Median | 5 |
| Q3 | 9 |
| 95-th percentile | 99 |
| Maximum | 99 |
| Range | 99 |
| Interquartile range | 6 |
Descriptive statistics
| Standard deviation | 29.31 |
|---|---|
| Coef of variation | 1.8803 |
| Kurtosis | 4.1519 |
| Mean | 15.588 |
| MAD | 18.132 |
| Skewness | 2.456 |
| Sum | 649520 |
| Variance | 859.06 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 3.0 | 7048 | 16.9% |
|
| 5.0 | 5433 | 13.0% |
|
| 1.0 | 4938 | 11.9% |
|
| 8.0 | 4919 | 11.8% |
|
| 99.0 | 4529 | 10.9% |
|
| 2.0 | 3689 | 8.9% |
|
| 7.0 | 2590 | 6.2% |
|
| 12.0 | 2432 | 5.8% |
|
| 9.0 | 1920 | 4.6% |
|
| 6.0 | 1276 | 3.1% |
|
| Other values (5) | 2895 | 6.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 148 | 0.4% |
|
| 1.0 | 4938 | 11.9% |
|
| 2.0 | 3689 | 8.9% |
|
| 3.0 | 7048 | 16.9% |
|
| 4.0 | 214 | 0.5% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 10.0 | 1255 | 3.0% |
|
| 11.0 | 549 | 1.3% |
|
| 12.0 | 2432 | 5.8% |
|
| 13.0 | 729 | 1.7% |
|
| 99.0 | 4529 | 10.9% |
|
General
Numeric
| Distinct count | 11 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 3.8775 |
|---|---|
| Minimum | 0 |
| Maximum | 10 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 4 |
| Q3 | 6 |
| 95-th percentile | 9 |
| Maximum | 10 |
| Range | 10 |
| Interquartile range | 5 |
Descriptive statistics
| Standard deviation | 2.5849 |
|---|---|
| Coef of variation | 0.66663 |
| Kurtosis | -0.91615 |
| Mean | 3.8775 |
| MAD | 2.2163 |
| Skewness | 0.43665 |
| Sum | 161570 |
| Variance | 6.6816 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 13587 | 32.6% |
|
| 6.0 | 7819 | 18.8% |
|
| 3.0 | 6375 | 15.3% |
|
| 4.0 | 4245 | 10.2% |
|
| 5.0 | 3292 | 7.9% |
|
| 9.0 | 3075 | 7.4% |
|
| 8.0 | 1610 | 3.9% |
|
| 7.0 | 802 | 1.9% |
|
| 2.0 | 784 | 1.9% |
|
| 10.0 | 77 | 0.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 3 | 0.0% |
|
| 1.0 | 13587 | 32.6% |
|
| 2.0 | 784 | 1.9% |
|
| 3.0 | 6375 | 15.3% |
|
| 4.0 | 4245 | 10.2% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 6.0 | 7819 | 18.8% |
|
| 7.0 | 802 | 1.9% |
|
| 8.0 | 1610 | 3.9% |
|
| 9.0 | 3075 | 7.4% |
|
| 10.0 | 77 | 0.2% |
|
Land_Class
Numeric
| Distinct count | 12 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.769 |
|---|---|
| Minimum | 0 |
| Maximum | 11 |
| Zeros (%) | 0.7% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 1 |
| Q3 | 1 |
| 95-th percentile | 7 |
| Maximum | 11 |
| Range | 11 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 1.8189 |
|---|---|
| Coef of variation | 1.0282 |
| Kurtosis | 5.0591 |
| Mean | 1.769 |
| MAD | 1.1956 |
| Skewness | 2.473 |
| Sum | 73713 |
| Variance | 3.3083 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 31710 | 76.1% |
|
| 2.0 | 4072 | 9.8% |
|
| 7.0 | 1524 | 3.7% |
|
| 5.0 | 1170 | 2.8% |
|
| 6.0 | 748 | 1.8% |
|
| 4.0 | 747 | 1.8% |
|
| 8.0 | 744 | 1.8% |
|
| 3.0 | 350 | 0.8% |
|
| 0.0 | 296 | 0.7% |
|
| 9.0 | 218 | 0.5% |
|
| Other values (2) | 90 | 0.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 296 | 0.7% |
|
| 1.0 | 31710 | 76.1% |
|
| 2.0 | 4072 | 9.8% |
|
| 3.0 | 350 | 0.8% |
|
| 4.0 | 747 | 1.8% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 7.0 | 1524 | 3.7% |
|
| 8.0 | 744 | 1.8% |
|
| 9.0 | 218 | 0.5% |
|
| 10.0 | 89 | 0.2% |
|
| 11.0 | 1 | 0.0% |
|
Land_Class_tg
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.57018 |
|---|---|
| Minimum | 0 |
| Maximum | 2 |
| Zeros (%) | 49.9% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 1 |
| Q3 | 1 |
| 95-th percentile | 2 |
| Maximum | 2 |
| Range | 2 |
| Interquartile range | 1 |
Descriptive statistics
| Standard deviation | 0.61843 |
|---|---|
| Coef of variation | 1.0846 |
| Kurtosis | -0.57603 |
| Mean | 0.57018 |
| MAD | 0.56847 |
| Skewness | 0.6035 |
| Sum | 23759 |
| Variance | 0.38245 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 20772 | 49.9% |
|
| 1.0 | 18035 | 43.3% |
|
| 2.0 | 2862 | 6.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 20772 | 49.9% |
|
| 1.0 | 18035 | 43.3% |
|
| 2.0 | 2862 | 6.9% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 20772 | 49.9% |
|
| 1.0 | 18035 | 43.3% |
|
| 2.0 | 2862 | 6.9% |
|
Latitude
Highly correlated
This variable is highly correlated with Y and should be ignored for analysis
| Correlation | 0.99991 |
|---|
Longitude
Highly correlated
This variable is highly correlated with X and should be ignored for analysis
| Correlation | 0.99964 |
|---|
Minimum
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.12388 |
|---|
| 0 |
36507
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 36507 | 87.6% |
|
| 1 | 5162 | 12.4% |
|
Protection_agency
Numeric
| Distinct count | 10 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.1153 |
|---|---|
| Minimum | 0 |
| Maximum | 9 |
| Zeros (%) | 0.3% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 1 |
| Q1 | 1 |
| Median | 1 |
| Q3 | 1 |
| 95-th percentile | 1 |
| Maximum | 9 |
| Range | 9 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.73505 |
|---|---|
| Coef of variation | 0.65905 |
| Kurtosis | 44.797 |
| Mean | 1.1153 |
| MAD | 0.23057 |
| Skewness | 6.5649 |
| Sum | 46474 |
| Variance | 0.54029 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 1.0 | 40286 | 96.7% |
|
| 6.0 | 491 | 1.2% |
|
| 3.0 | 314 | 0.8% |
|
| 5.0 | 157 | 0.4% |
|
| 0.0 | 142 | 0.3% |
|
| 7.0 | 107 | 0.3% |
|
| 4.0 | 91 | 0.2% |
|
| 2.0 | 46 | 0.1% |
|
| 9.0 | 30 | 0.1% |
|
| 8.0 | 5 | 0.0% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 142 | 0.3% |
|
| 1.0 | 40286 | 96.7% |
|
| 2.0 | 46 | 0.1% |
|
| 3.0 | 314 | 0.8% |
|
| 4.0 | 91 | 0.2% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 5.0 | 157 | 0.4% |
|
| 6.0 | 491 | 1.2% |
|
| 7.0 | 107 | 0.3% |
|
| 8.0 | 5 | 0.0% |
|
| 9.0 | 30 | 0.1% |
|
SB_360_Liable
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.0094795 |
|---|
| 0.0 |
41274
|
|---|---|
| 1.0 |
|
| Value | Count | Frequency (%) | |
| 0.0 | 41274 | 99.1% |
|
| 1.0 | 395 | 0.9% |
|
SB_360_Lot
Numeric
| Distinct count | 3 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 0.13478 |
|---|---|
| Minimum | 0 |
| Maximum | 2 |
| Zeros (%) | 92.0% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th percentile | 2 |
| Maximum | 2 |
| Range | 2 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.47641 |
|---|---|
| Coef of variation | 3.5348 |
| Kurtosis | 10.238 |
| Mean | 0.13478 |
| MAD | 0.2481 |
| Skewness | 3.437 |
| Sum | 5616 |
| Variance | 0.22696 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 38352 | 92.0% |
|
| 2.0 | 2299 | 5.5% |
|
| 1.0 | 1018 | 2.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 38352 | 92.0% |
|
| 1.0 | 1018 | 2.4% |
|
| 2.0 | 2299 | 5.5% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 38352 | 92.0% |
|
| 1.0 | 1018 | 2.4% |
|
| 2.0 | 2299 | 5.5% |
|
Size_at_attack
Numeric
| Distinct count | 285 |
|---|---|
| Unique (%) | 0.7% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 3.0355 |
|---|---|
| Minimum | 0 |
| Maximum | 50815 |
| Zeros (%) | 51.8% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0.1 |
| 95-th percentile | 1.5 |
| Maximum | 50815 |
| Range | 50815 |
| Interquartile range | 0.1 |
Descriptive statistics
| Standard deviation | 260.28 |
|---|---|
| Coef of variation | 85.748 |
| Kurtosis | 34968 |
| Mean | 3.0355 |
| MAD | 5.6317 |
| Skewness | 181.37 |
| Sum | 126480 |
| Variance | 67748 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 0.0 | 21569 | 51.8% |
|
| 0.01 | 7063 | 17.0% |
|
| 0.1 | 4409 | 10.6% |
|
| 0.25 | 1893 | 4.5% |
|
| 0.5 | 1022 | 2.5% |
|
| 1.0 | 793 | 1.9% |
|
| 0.2 | 539 | 1.3% |
|
| 2.0 | 427 | 1.0% |
|
| 0.02 | 409 | 1.0% |
|
| 1.5 | 290 | 0.7% |
|
| Other values (275) | 3255 | 7.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 21569 | 51.8% |
|
| 0.01 | 7063 | 17.0% |
|
| 0.02 | 409 | 1.0% |
|
| 0.03 | 110 | 0.3% |
|
| 0.04 | 48 | 0.1% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 3000.0 | 3 | 0.0% |
|
| 3723.0 | 1 | 0.0% |
|
| 7050.0 | 1 | 0.0% |
|
| 11615.0 | 1 | 0.0% |
|
| 50815.0 | 1 | 0.0% |
|
Slope
Highly correlated
This variable is highly correlated with Aspect and should be ignored for analysis
| Correlation | 0.99941 |
|---|
SurchargeLot
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.10295 |
|---|
| 0 |
37379
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 37379 | 89.7% |
|
| 1 | 4290 | 10.3% |
|
SurchargeLotAssessed
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.066812 |
|---|
| 0 |
38885
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 38885 | 93.3% |
|
| 1 | 2784 | 6.7% |
|
Topography
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.33677 |
|---|
| 0 |
27636
|
|---|---|
| 1 |
14033
|
| Value | Count | Frequency (%) | |
| 0 | 27636 | 66.3% |
|
| 1 | 14033 | 33.7% |
|
X
Numeric
| Distinct count | 31863 |
|---|---|
| Unique (%) | 76.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 881940 |
|---|---|
| Minimum | 224480 |
| Maximum | 2330100 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 224480 |
|---|---|
| 5-th percentile | 367460 |
| Q1 | 545730 |
| Median | 705170 |
| Q3 | 1074300 |
| 95-th percentile | 1936900 |
| Maximum | 2330100 |
| Range | 2105600 |
| Interquartile range | 528560 |
Descriptive statistics
| Standard deviation | 469600 |
|---|---|
| Coef of variation | 0.53246 |
| Kurtosis | 0.36579 |
| Mean | 881940 |
| MAD | 375690 |
| Skewness | 1.1456 |
| Sum | 36749000000 |
| Variance | 220520000000 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 526103.824146986 | 28 | 0.1% |
|
| 562406.384842515 | 25 | 0.1% |
|
| 536428.536745414 | 24 | 0.1% |
|
| 571746.639435694 | 22 | 0.1% |
|
| 546578.327099741 | 21 | 0.1% |
|
| 457387.64238844794 | 20 | 0.0% |
|
| 428648.02460630203 | 19 | 0.0% |
|
| 545210.855314955 | 19 | 0.0% |
|
| 623516.884514436 | 18 | 0.0% |
|
| 536614.990157485 | 17 | 0.0% |
|
| Other values (31853) | 41456 | 99.5% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 224481.370734915 | 2 | 0.0% |
|
| 226531.344816267 | 1 | 0.0% |
|
| 228714.39796587802 | 1 | 0.0% |
|
| 229226.158792645 | 1 | 0.0% |
|
| 231490.077427819 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2270734.09120736 | 1 | 0.0% |
|
| 2271832.16240157 | 1 | 0.0% |
|
| 2288277.34776902 | 1 | 0.0% |
|
| 2293224.94717848 | 1 | 0.0% |
|
| 2330062.90485564 | 1 | 0.0% |
|
Y
Numeric
| Distinct count | 31863 |
|---|---|
| Unique (%) | 76.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 743900 |
|---|---|
| Minimum | 54599 |
| Maximum | 1656700 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 54599 |
|---|---|
| 5-th percentile | 155730 |
| Q1 | 295660 |
| Median | 701500 |
| Q3 | 1144000 |
| 95-th percentile | 1478800 |
| Maximum | 1656700 |
| Range | 1602100 |
| Interquartile range | 848320 |
Descriptive statistics
| Standard deviation | 454330 |
|---|---|
| Coef of variation | 0.61075 |
| Kurtosis | -1.2942 |
| Mean | 743900 |
| MAD | 403300 |
| Skewness | 0.25417 |
| Sum | 30997000000 |
| Variance | 206420000000 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 292139.513779521 | 28 | 0.1% |
|
| 264508.78083989 | 25 | 0.1% |
|
| 286449.714566931 | 24 | 0.1% |
|
| 696971.049212605 | 22 | 0.1% |
|
| 275555.029855639 | 21 | 0.1% |
|
| 161345.489501312 | 20 | 0.0% |
|
| 228183.73392388198 | 19 | 0.0% |
|
| 1643614.27559055 | 19 | 0.0% |
|
| 187974.796259835 | 18 | 0.0% |
|
| 297458.22473754 | 17 | 0.0% |
|
| Other values (31853) | 41456 | 99.5% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 54598.510826766505 | 1 | 0.0% |
|
| 66764.41568242009 | 1 | 0.0% |
|
| 90169.05479002 | 1 | 0.0% |
|
| 90816.4665354341 | 1 | 0.0% |
|
| 90865.3143044561 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1649814.66699475 | 1 | 0.0% |
|
| 1651909.77132545 | 1 | 0.0% |
|
| 1652731.50098425 | 1 | 0.0% |
|
| 1652765.31102362 | 3 | 0.0% |
|
| 1656677.58923885 | 1 | 0.0% |
|
cluster
Numeric
| Distinct count | 10 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.125 |
|---|---|
| Minimum | 0 |
| Maximum | 9 |
| Zeros (%) | 11.6% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 0 |
| Q1 | 2 |
| Median | 4 |
| Q3 | 7 |
| 95-th percentile | 9 |
| Maximum | 9 |
| Range | 9 |
| Interquartile range | 5 |
Descriptive statistics
| Standard deviation | 2.803 |
|---|---|
| Coef of variation | 0.6795 |
| Kurtosis | -1.065 |
| Mean | 4.125 |
| MAD | 2.3494 |
| Skewness | 0.23585 |
| Sum | 171886 |
| Variance | 7.8566 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 4 | 6562 | 15.7% |
|
| 2 | 5349 | 12.8% |
|
| 0 | 4815 | 11.6% |
|
| 3 | 4707 | 11.3% |
|
| 1 | 3713 | 8.9% |
|
| 9 | 3622 | 8.7% |
|
| 8 | 3587 | 8.6% |
|
| 5 | 3311 | 7.9% |
|
| 7 | 3239 | 7.8% |
|
| 6 | 2764 | 6.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0 | 4815 | 11.6% |
|
| 1 | 3713 | 8.9% |
|
| 2 | 5349 | 12.8% |
|
| 3 | 4707 | 11.3% |
|
| 4 | 6562 | 15.7% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 5 | 3311 | 7.9% |
|
| 6 | 2764 | 6.6% |
|
| 7 | 3239 | 7.8% |
|
| 8 | 3587 | 8.6% |
|
| 9 | 3622 | 8.7% |
|
cluster_0
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.11555 |
|---|
| 0 |
36854
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 36854 | 88.4% |
|
| 1 | 4815 | 11.6% |
|
cluster_1
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.089107 |
|---|
| 0 |
37956
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 37956 | 91.1% |
|
| 1 | 3713 | 8.9% |
|
cluster_2
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.12837 |
|---|
| 0 |
36320
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 36320 | 87.2% |
|
| 1 | 5349 | 12.8% |
|
cluster_3
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.11296 |
|---|
| 0 |
36962
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 36962 | 88.7% |
|
| 1 | 4707 | 11.3% |
|
cluster_4
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.15748 |
|---|
| 0 |
35107
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 35107 | 84.3% |
|
| 1 | 6562 | 15.7% |
|
cluster_5
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.07946 |
|---|
| 0 |
38358
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 38358 | 92.1% |
|
| 1 | 3311 | 7.9% |
|
cluster_6
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.066332 |
|---|
| 0 |
38905
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 38905 | 93.4% |
|
| 1 | 2764 | 6.6% |
|
cluster_7
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.077732 |
|---|
| 0 |
38430
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 38430 | 92.2% |
|
| 1 | 3239 | 7.8% |
|
cluster_8
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.086083 |
|---|
| 0 |
38082
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 38082 | 91.4% |
|
| 1 | 3587 | 8.6% |
|
cluster_9
Boolean
| Distinct count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Mean | 0.086923 |
|---|
| 0 |
38047
|
|---|---|
| 1 |
|
| Value | Count | Frequency (%) | |
| 0 | 38047 | 91.3% |
|
| 1 | 3622 | 8.7% |
|
fire_dayofyear
Numeric
| Distinct count | 362 |
|---|---|
| Unique (%) | 0.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 211.73 |
|---|---|
| Minimum | 0 |
| Maximum | 363 |
| Zeros (%) | 0.4% |
Quantile statistics
| Minimum | 0 |
|---|---|
| 5-th percentile | 129 |
| Q1 | 186 |
| Median | 214 |
| Q3 | 242 |
| 95-th percentile | 285 |
| Maximum | 363 |
| Range | 363 |
| Interquartile range | 56 |
Descriptive statistics
| Standard deviation | 48.275 |
|---|---|
| Coef of variation | 0.22801 |
| Kurtosis | 1.8562 |
| Mean | 211.73 |
| MAD | 36.111 |
| Skewness | -0.72874 |
| Sum | 8822400 |
| Variance | 2330.5 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 207.0 | 593 | 1.4% |
|
| 213.0 | 570 | 1.4% |
|
| 222.0 | 546 | 1.3% |
|
| 221.0 | 471 | 1.1% |
|
| 220.0 | 469 | 1.1% |
|
| 212.0 | 469 | 1.1% |
|
| 217.0 | 452 | 1.1% |
|
| 208.0 | 449 | 1.1% |
|
| 224.0 | 445 | 1.1% |
|
| 219.0 | 442 | 1.1% |
|
| Other values (352) | 36763 | 88.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.0 | 162 | 0.4% |
|
| 2.0 | 3 | 0.0% |
|
| 3.0 | 1 | 0.0% |
|
| 4.0 | 1 | 0.0% |
|
| 5.0 | 5 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 359.0 | 3 | 0.0% |
|
| 360.0 | 2 | 0.0% |
|
| 361.0 | 3 | 0.0% |
|
| 362.0 | 2 | 0.0% |
|
| 363.0 | 1 | 0.0% |
|
index
Numeric
| Distinct count | 41669 |
|---|---|
| Unique (%) | 100.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 31015 |
|---|---|
| Minimum | 1 |
| Maximum | 64052 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1 |
|---|---|
| 5-th percentile | 2877.4 |
| Q1 | 14101 |
| Median | 28361 |
| Q3 | 49577 |
| 95-th percentile | 61217 |
| Maximum | 64052 |
| Range | 64051 |
| Interquartile range | 35476 |
Descriptive statistics
| Standard deviation | 19380 |
|---|---|
| Coef of variation | 0.62488 |
| Kurtosis | -1.3213 |
| Mean | 31015 |
| MAD | 17010 |
| Skewness | 0.13852 |
| Sum | 1292347969 |
| Variance | 375590000 |
| Memory size | 325.6 KiB |
| Value | Count | Frequency (%) | |
| 2047 | 1 | 0.0% |
|
| 48541 | 1 | 0.0% |
|
| 52659 | 1 | 0.0% |
|
| 50610 | 1 | 0.0% |
|
| 56753 | 1 | 0.0% |
|
| 54704 | 1 | 0.0% |
|
| 9646 | 1 | 0.0% |
|
| 15789 | 1 | 0.0% |
|
| 13740 | 1 | 0.0% |
|
| 3499 | 1 | 0.0% |
|
| Other values (41659) | 41659 | 100.0% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1 | 1 | 0.0% |
|
| 2 | 1 | 0.0% |
|
| 3 | 1 | 0.0% |
|
| 4 | 1 | 0.0% |
|
| 9 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 64046 | 1 | 0.0% |
|
| 64048 | 1 | 0.0% |
|
| 64049 | 1 | 0.0% |
|
| 64050 | 1 | 0.0% |
|
| 64052 | 1 | 0.0% |
|
| X | Y | FireYear | FireCategory | Longitude | Latitude | Protection_agency | Federal_lands | Land_Class | Land_Class_tg | Minimum | Dual | FO_Land_Owner | SurchargeLot | SurchargeLotAssessed | SB_360_Lot | SB_360_Liable | Discovered_By | Caused_by | General | FEMA | Flame_length | Size_at_attack | Behavior | Fuel | Topography | Aspect | Slope | Elevation | fire_dayofyear | cluster | cluster_0 | cluster_1 | cluster_2 | cluster_3 | cluster_4 | cluster_5 | cluster_6 | cluster_7 | cluster_8 | cluster_9 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8262 | 1.596414e+06 | 9.844365e+05 | 1999 | 1 | -119.412 | 44.4456 | 1.0 | 1.0 | 2.0 | 2.0 | 0 | 0 | 20.0 | 0 | 0 | 0.0 | 0.0 | 2.0 | 1.0 | 1.0 | 0 | 3.0 | 5.00 | 3.0 | 6.0 | 1 | 1 | 1 | 1 | 215.0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 49257 | 5.106075e+05 | 2.815337e+05 | 2004 | 1 | -123.472 | 42.4824 | 1.0 | 1.0 | 1.0 | 1.0 | 0 | 0 | 20.0 | 0 | 0 | 0.0 | 0.0 | 6.0 | 13.0 | 3.0 | 0 | 2.0 | 0.75 | 3.0 | 5.0 | 1 | 1 | 1 | 1 | 224.0 | 4 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| 45688 | 7.859479e+05 | 1.249405e+06 | 2001 | 1 | -122.541 | 45.1597 | 1.0 | 1.0 | 1.0 | 1.0 | 1 | 1 | 13.0 | 1 | 0 | 0.0 | 0.0 | 9.0 | 5.0 | 6.0 | 0 | 1.0 | 0.00 | 1.0 | 1.0 | 1 | 1 | 1 | 1 | 222.0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 14671 | 5.275577e+05 | 1.803938e+05 | 1991 | 1 | -123.396 | 42.2068 | 1.0 | 1.0 | 1.0 | 0.0 | 0 | 0 | 50.0 | 0 | 0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0 | 0.0 | 0.00 | 0.0 | 7.0 | 0 | 0 | 0 | 0 | 203.0 | 4 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| 59157 | 4.586384e+05 | 1.656392e+05 | 2014 | 1 | -123.648 | 42.1594 | 1.0 | 1.0 | 5.0 | 1.0 | 0 | 0 | 20.0 | 0 | 0 | 0.0 | 0.0 | 6.0 | 5.0 | 5.0 | 0 | 0.0 | 0.01 | 2.0 | 8.0 | 1 | 1 | 1 | 1 | 246.0 | 4 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
# Calculating Baseline Prediction errors
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_error as MSE
# Base prediction is mean of the y_train
y_base = [np.mean(y_train)]*y_test.shape[0]
# Mean Errors for Baseline Prediction
prediction_errors = {}
prediction_errors['Base'] = [MAE(y_test,y_base),MSE(y_test,y_base)]
# prediction_errors['Base MSE'] = MAE(y_test,y_base)
print(f'Mean absolute error for baseline prediction is {MAE(y_test,y_base)}')
print(f'Mean standard error for baseline prediction is {MSE(y_test,y_base)}')
Mean absolute error for base prediction is 6.340975638593508 Mean standard error for base prediction is 611.4065088511755
# Using Logistic Regression to train the model
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
# MAE and MSE using Linear Regression
y_pred = lr.predict(X_test)
# Errors for Linear Prediction
prediction_errors['Linear Regression'] = [MAE(y_test,y_pred),MSE(y_test,y_pred)]
print(f'Mean absolute error for Linear Regression is {MAE(y_test,y_pred)}')
print(f'Mean standard error for Linear Regression is {MSE(y_test,y_pred)}')
Mean absolute error for Linear Regression is 5.613454464205515 Mean standard error for Linear Regression is 497.7229091110607
# Using RandomForest to train the model
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, y_train)
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
max_features='auto', max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=100,
n_jobs=None, oob_score=False, random_state=None,
verbose=0, warm_start=False)
# MAE and MSE using Random Forest Regression
y_pred = rf.predict(X_test)
# Mean Errors for Random Forest Prediction
prediction_errors['Random Forest'] = [MAE(y_test,y_pred),MSE(y_test,y_pred)]
print(f'Mean absolute error for Linear Regression is {MAE(y_test,y_pred)}')
print(f'Mean standard error for Linear Regression is {MSE(y_test,y_pred)}')
Mean absolute error for Linear Regression is 4.920883348297547 Mean standard error for Linear Regression is 489.64086958981477
# Training the data using Ridge Regression
from sklearn.linear_model import Ridge
rr = Ridge(alpha=1.0)
rr.fit(X_train, y_train)
Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
normalize=False, random_state=None, solver='auto', tol=0.001)
# MAE and MSE using Ridge
y_pred = rr.predict(X_test)
# Mean Errors for Ridge Prediction
prediction_errors['Ridge Regression'] = [MAE(y_test,y_pred),MSE(y_test,y_pred)]
print(f'Mean absolute error for Ridge is {MAE(y_test,y_pred)}')
print(f'Mean standard error for Ridge is {MSE(y_test,y_pred)}')
Mean absolute error for Ridge is 5.619662768105583 Mean standard error for Ridge is 497.723412149486
# Building a Neural network to predict the area burned by fire in Oregon
model = Sequential()
model.add(Dense(35, input_dim=n, kernel_initializer='normal', activation='relu'))
model.add(Dense(70, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(56, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(28, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(14, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='linear'))
model.summary()
Model: "sequential_2" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_20 (Dense) (None, 35) 1260 _________________________________________________________________ dense_21 (Dense) (None, 70) 2520 _________________________________________________________________ dropout_12 (Dropout) (None, 70) 0 _________________________________________________________________ dense_22 (Dense) (None, 128) 9088 _________________________________________________________________ dropout_13 (Dropout) (None, 128) 0 _________________________________________________________________ dense_23 (Dense) (None, 256) 33024 _________________________________________________________________ dropout_14 (Dropout) (None, 256) 0 _________________________________________________________________ dense_24 (Dense) (None, 128) 32896 _________________________________________________________________ dropout_15 (Dropout) (None, 128) 0 _________________________________________________________________ dense_25 (Dense) (None, 56) 7224 _________________________________________________________________ dropout_16 (Dropout) (None, 56) 0 _________________________________________________________________ dense_26 (Dense) (None, 28) 1596 _________________________________________________________________ dropout_17 (Dropout) (None, 28) 0 _________________________________________________________________ dense_27 (Dense) (None, 14) 406 _________________________________________________________________ dense_28 (Dense) (None, 8) 120 _________________________________________________________________ dense_29 (Dense) (None, 1) 9 ================================================================= Total params: 88,143 Trainable params: 88,143 Non-trainable params: 0 _________________________________________________________________
model.compile(loss='mae', optimizer='adam', metrics=['mse','mae'])
history = model.fit(X_train, y_train.values, epochs=20, batch_size=1028, verbose = 2, validation_split = 0.05, shuffle=True)
Train on 39311 samples, validate on 2070 samples Epoch 1/20 39311/39311 - 5s - loss: 3.8166 - mse: 605.4470 - mae: 3.8166 - val_loss: 2.8327 - val_mse: 284.7296 - val_mae: 2.8327 Epoch 2/20 39311/39311 - 2s - loss: 3.6482 - mse: 561.8547 - mae: 3.6482 - val_loss: 2.5907 - val_mse: 243.4479 - val_mae: 2.5907 Epoch 3/20 39311/39311 - 2s - loss: 3.3818 - mse: 527.8478 - mae: 3.3818 - val_loss: 2.3467 - val_mse: 233.6139 - val_mae: 2.3467 Epoch 4/20 39311/39311 - 2s - loss: 3.2943 - mse: 495.7027 - mae: 3.2943 - val_loss: 2.3197 - val_mse: 232.6433 - val_mae: 2.3197 Epoch 5/20 39311/39311 - 2s - loss: 3.2833 - mse: 490.7452 - mae: 3.2833 - val_loss: 2.3384 - val_mse: 233.3045 - val_mae: 2.3384 Epoch 6/20 39311/39311 - 2s - loss: 3.2603 - mse: 490.4917 - mae: 3.2603 - val_loss: 2.2713 - val_mse: 233.0452 - val_mae: 2.2713 Epoch 7/20 39311/39311 - 2s - loss: 3.2743 - mse: 499.8663 - mae: 3.2743 - val_loss: 2.3355 - val_mse: 239.2533 - val_mae: 2.3355 Epoch 8/20 39311/39311 - 3s - loss: 3.2850 - mse: 498.4460 - mae: 3.2850 - val_loss: 2.2677 - val_mse: 233.7367 - val_mae: 2.2677 Epoch 9/20 39311/39311 - 2s - loss: 3.2476 - mse: 495.4905 - mae: 3.2476 - val_loss: 2.2567 - val_mse: 234.5774 - val_mae: 2.2567 Epoch 10/20 39311/39311 - 2s - loss: 3.2536 - mse: 500.7908 - mae: 3.2536 - val_loss: 2.2573 - val_mse: 233.0869 - val_mae: 2.2573 Epoch 11/20 39311/39311 - 3s - loss: 3.2296 - mse: 489.6448 - mae: 3.2296 - val_loss: 2.2848 - val_mse: 231.4359 - val_mae: 2.2848 Epoch 12/20 39311/39311 - 3s - loss: 3.2342 - mse: 493.7725 - mae: 3.2342 - val_loss: 2.2386 - val_mse: 232.5520 - val_mae: 2.2386 Epoch 13/20 39311/39311 - 2s - loss: 3.2256 - mse: 490.4494 - mae: 3.2256 - val_loss: 2.2447 - val_mse: 234.2167 - val_mae: 2.2447 Epoch 14/20 39311/39311 - 2s - loss: 3.2158 - mse: 488.7504 - mae: 3.2158 - val_loss: 2.2661 - val_mse: 235.6802 - val_mae: 2.2661 Epoch 15/20 39311/39311 - 2s - loss: 3.1984 - mse: 485.0881 - mae: 3.1984 - val_loss: 2.2331 - val_mse: 231.9371 - val_mae: 2.2331 Epoch 16/20 39311/39311 - 2s - loss: 3.2124 - mse: 489.5925 - mae: 3.2124 - val_loss: 2.2516 - val_mse: 235.3599 - val_mae: 2.2516 Epoch 17/20 39311/39311 - 2s - loss: 3.2108 - mse: 489.8666 - mae: 3.2108 - val_loss: 2.2273 - val_mse: 232.0704 - val_mae: 2.2273 Epoch 18/20 39311/39311 - 2s - loss: 3.1988 - mse: 486.9035 - mae: 3.1988 - val_loss: 2.2414 - val_mse: 231.4764 - val_mae: 2.2414 Epoch 19/20 39311/39311 - 2s - loss: 3.1724 - mse: 474.0025 - mae: 3.1724 - val_loss: 2.2823 - val_mse: 235.5647 - val_mae: 2.2823 Epoch 20/20 39311/39311 - 2s - loss: 3.1886 - mse: 480.1780 - mae: 3.1886 - val_loss: 2.2775 - val_mse: 232.9865 - val_mae: 2.2775
# MAE and MSE using Neural Networks
y_pred = model.predict(X_test)
# Mean Errors for Neural Networks
prediction_errors['Neural Networks'] = [MAE(y_test,y_pred),MSE(y_test,y_pred)]
print(f'Mean absolute error for NN is {MAE(y_test,y_pred)}')
print(f'Mean standard error for NN is {MSE(y_test,y_pred)}')
Mean absolute error for NN is 3.128617542037161 Mean standard error for NN is 509.8695952644622
# Plot training & validation loss values for Neural Networks
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss: Mean Absolue Error')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()
<matplotlib.legend.Legend at 0x1a3cd366a0>
# Summary of errors for all models used
prediction_errors = pd.DataFrame(prediction_errors)
prediction_errors.index = ['Mean Absoluter Error', 'Mean Standard Error']
prediction_errors.T
| Mean Absoluter Error | Mean Standard Error | |
|---|---|---|
| Base | 6.340976 | 611.406509 |
| Linear Regression | 5.613454 | 497.722909 |
| Random Forest | 4.920883 | 489.640870 |
| Ridge Regression | 5.619663 | 497.723412 |
| Neural Networks | 3.128618 | 509.869595 |